Where are Food Deserts in Chicago?


In [31]:
%matplotlib inline
import os
import sys

from dateutil.parser import parse as dtparse

import numpy as np
from scipy.interpolate import griddata
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gp
from geopy import distance
from shapely import geometry
from IPython.display import clear_output

#This one is optional, but makes the plots look nicer:
import seaborn as sns
sns.set_context('poster')

Data was provided by Chicago, and github user talllguy added GeoJSON support for grocery chain data: https://github.com/talllguy/food-deserts. Clone the repository and provide the filenames below:


In [2]:
geo_grocery_fname = "/home/joerg/data/chicago/food-deserts/data/geoJSON/GroceryStores2013.geojson"
pop_by_ca_fname = "../data/population-by-community-area.csv"
area_shape_fname = "/home/joerg/data/chicago/shapes/CommAreas.shp"

In [3]:
geodf = gp.read_file(area_shape_fname)
grocery = gp.GeoDataFrame()
grocery = gp.GeoDataFrame().from_file(geo_grocery_fname)
pop = pd.read_csv(pop_by_ca_fname)
G = grocery.groupby("COMMUNITY AREA NAME").aggregate({"ADDRESS": "count"}).reset_index().rename(columns={"ADDRESS": "num_stores"})

In [4]:
pop["community_area"] = [x.upper() for x in pop.community_area]
X = gp.GeoDataFrame(pd.merge(geodf, pop, left_on="COMMUNITY", right_on="community_area"))
X = gp.GeoDataFrame(pd.merge(X, G, left_on = "community_area", right_on="COMMUNITY AREA NAME"))
X["pop_per_store"] = X["pop2010"]/X["num_stores"]
X["pop_per_store_cat"] = ["4000 or less" if p < 4000 else "4000-6000" if 4000<p<=6000 else "6000-8000" if 6000<p<=8000  else
                          "8000-12000" if 8000<p<=12000 else "12000-16000" if 12000<p<=16000 else "More than 16000"
                          for p in X.pop_per_store]

In [5]:
plt.figure(figsize=(14,16))
X.plot("pop_per_store_cat", categorical=True, legend=True, colormap="cool")
print(grocery["Y COORDINATE"].min())
for _, row in grocery.iterrows():
    if float(row["Y COORDINATE"]) < 1170000:
        continue
    plt.plot(float(row["X COORDINATE"]), float(row["Y COORDINATE"]), 'd', markersize=7, color='red')
plt.title('Grocery stores 2013 and population per store per community area')
plt.gca().xaxis.set_visible(False)
plt.gca().yaxis.set_visible(False)
plt.show()


1167550.9330000000
/home/joerg/.py34/lib/python3.4/site-packages/matplotlib/font_manager.py:1236: UserWarning: findfont: Font family ['Arial'] not found. Falling back to Bitstream Vera Sans
  (prop.get_family(), self.defaultFamily[fontext]))

Distance to supermarkets

Let's now try to find the minimum distance to supermarkets and create a contour plot from that. The idea is to draw a lot of random points per community area and find the distance to the closest supermarket.


In [6]:
points_per_area = 100 #Could be done by area and population density for more even covergae

In [9]:
sample_points = {area: [] for area in X.COMMUNITY}

#Sample points randomly within area bounds and only add the ones it actually contains
for area, geom in zip(X.COMMUNITY, X.geometry):
    clear_output()
    print(area)
    while len(sample_points[area]) < points_per_area:
        x, y = np.random.uniform(geom.bounds[0], geom.bounds[2]), np.random.uniform(geom.bounds[1], geom.bounds[3])
        point = geometry.Point(x, y)
        if geom.contains(point):
            sample_points[area].append(point)


EDISON PARK

Now, for each point find the distance to the closest supermarket


In [27]:
point_distance = {area: [] for area in X.COMMUNITY}
distance.VincentyDistance.ELLIPSOID = 'WGS-84'

for area in X.COMMUNITY:
    for point in sample_points[area]:
        min_dist = np.inf
        for gx, gy in zip(grocery["X COORDINATE"], grocery["Y COORDINATE"]):
            gpoint = geometry.Point(float(gx), float(gy))
            dist = point.distance(gpoint)
            if dist < min_dist:
                min_dist = dist
        point_distance[area].append(min_dist)

In [64]:
Mi = min([min(L) for L in point_distance.values()])
Ma = max([max(L) for L in point_distance.values()])

In [85]:
sns.set_style("whitegrid")
levels = np.array([0,100,400,1000,1500,2500,5000,11000,13000])
clevels = np.array([0,2640,13000])
print(levels)
plt.figure(figsize=(14,16))
X.plot(alpha=0)
for i in range(X.shape[0]):
    area = X.COMMUNITY.iloc[i]
    poly = X.geometry.iloc[i]
    grid_x, grid_y = np.mgrid[poly.bounds[0]:poly.bounds[2]:100j, poly.bounds[1]:poly.bounds[3]:100j]
    S = sample_points[area][0]
    sp_x = [sp.x for sp in sample_points[area]]
    sp_y = [sp.y for sp in sample_points[area]]
    grid_z = griddata((sp_x, sp_y), point_distance[area], (grid_x, grid_y))
    plt.contour(grid_x, grid_y, grid_z, levels=clevels, cmap=plt.cm.gray)
    plt.contourf(grid_x, grid_y, grid_z, levels=levels, cmap=plt.cm.cool)
for _, row in grocery.iterrows():
    if float(row["Y COORDINATE"]) < 1170000:
        continue
    plt.plot(float(row["X COORDINATE"]), float(row["Y COORDINATE"]), 'd', markersize=7, color='red')
plt.gca().xaxis.set_visible(False)
plt.gca().yaxis.set_visible(False)
plt.title("Distance to closest grocery store in feet")
plt.colorbar()
plt.show()


[    0   100   400  1000  1500  2500  5000 11000 13000]

In [ ]: